Main question: at this point we’re interested in one single classification, i.e. what predicts whether people do maskless contacts with non-householders
sessionInfo()
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16
Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] catboost_0.26 rpart_4.1-15 rattle_5.4.0 bitops_1.0-7 tibble_3.1.2 doParallel_1.0.16 iterators_1.0.13 foreach_1.5.1
[9] cvms_1.3.0 tidyr_1.1.3 randomForest_4.6-14 caret_6.0-88 lattice_0.20-41 DataExplorer_0.8.2 faux_1.0.0 dplyr_1.0.7
[17] magrittr_2.0.1 parsnip_0.1.6 ggplot2_3.3.4
loaded via a namespace (and not attached):
[1] Rcpp_1.0.6 lubridate_1.7.10 class_7.3-17 assertthat_0.2.1 digest_0.6.27 ipred_0.9-11 utf8_1.2.1 R6_2.5.0
[9] plyr_1.8.6 stats4_4.0.3 evaluate_0.14 pillar_1.6.1 tictoc_1.0.1 rlang_0.4.11 data.table_1.14.0 Matrix_1.2-18
[17] rmarkdown_2.5 splines_4.0.3 gower_0.2.2 stringr_1.4.0 htmlwidgets_1.5.2 igraph_1.2.6 munsell_0.5.0 compiler_4.0.3
[25] xfun_0.19 pkgconfig_2.0.3 htmltools_0.5.0 nnet_7.3-14 tidyselect_1.1.1 gridExtra_2.3 prodlim_2019.11.13 codetools_0.2-16
[33] fansi_0.5.0 crayon_1.4.1 withr_2.4.2 MASS_7.3-53 recipes_0.1.16 ModelMetrics_1.2.2.2 grid_4.0.3 jsonlite_1.7.2
[41] nlme_3.1-149 gtable_0.3.0 lifecycle_1.0.0 DBI_1.1.1 pROC_1.17.0.1 scales_1.1.1 stringi_1.6.2 reshape2_1.4.4
[49] timeDate_3043.102 ellipsis_0.3.2 generics_0.1.0 vctrs_0.3.8 lava_1.6.9 tools_4.0.3 glue_1.4.2 purrr_0.3.4
[57] networkD3_0.4 survival_3.2-7 colorspace_2.0-1 knitr_1.30
df <- read.csv("data/shield_gjames_21-06-10.csv")
grouping_var <- "behaviour_unmasked"
# feature_list <- colnames(df[, !(names(df) %in% c(grouping_var, "id"))])
# feature_list <- c('intention_indoor_meeting', 'norms_people_present_indoors',
# 'sdt_motivation_extrinsic_2', 'sdt_motivation_identified_4', 'norms_family_friends', 'norms_risk_groups', 'norms_officials',
# 'norms_people_present_indoors')
if (grouping_var == "behaviour_unmasked") {
# df <- df %>% mutate(tmp = if_else(!!as.symbol(grouping_var) != 5, 'bad', 'good'))
df <- df %>% mutate(tmp = if_else(!!as.symbol(grouping_var) != 5, 0, 1))
names(df)[names(df) == 'tmp'] <- paste0(grouping_var, "_bool")
}
df[, paste0(grouping_var, "_bool")] <- as.factor(df[, paste0(grouping_var, "_bool")])
# df %<>%
# mutate_each_(funs(factor(.)), colnames(df))
# str(df)
ordinal_vars_mydata <- ordering_lookup %>%
dplyr::filter(varname %in% names(df)) %>%
dplyr::filter(ordering == "ordered")
df <- df %>%
# Ordered variables as ordinal factors
dplyr::mutate(across(.cols = ordinal_vars_mydata$varname,
~factor(., ordered = TRUE))) %>%
# Everything else as unordered factors
dplyr::mutate(across(.cols = -ordinal_vars_mydata$varname,
~factor(.))) %>%
# Fix ordering in the intention variables
dplyr::mutate(across(.cols = contains("intention_"),
~dplyr::recode_factor(.,
"1" = "4",
"2" = "1",
"3" = "2",
"4" = "3",
.ordered = TRUE)))
str(df)
'data.frame': 2272 obs. of 94 variables:
$ id : Factor w/ 2272 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
$ demographic_gender : Factor w/ 2 levels "1","2": 1 2 1 1 1 2 2 2 1 1 ...
$ demographic_age : Ord.factor w/ 5 levels "18-29"<"30-39"<..: 4 2 1 5 5 4 1 2 5 5 ...
$ demographic_4_areas : Factor w/ 4 levels "1","2","3","4": 1 2 1 1 2 1 1 4 4 1 ...
$ demographic_8_areas : Factor w/ 8 levels "1","2","3","4",..: 2 6 2 2 7 1 2 6 6 7 ...
$ behaviour_indoors_nonhouseholders: Ord.factor w/ 6 levels "1"<"2"<"3"<"4"<..: 5 5 3 4 5 3 5 5 4 5 ...
$ behaviour_close_contact : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 4 4 2 3 4 3 4 4 4 3 ...
$ behaviour_quarantined : Factor w/ 3 levels "1","2","3": 2 2 2 2 2 2 2 2 2 2 ...
$ behaviour_unmasked : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 5 5 2 2 4 3 5 3 4 5 ...
$ mask_wearing_cloth_mask : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 1 1 ...
$ mask_wearing_disposable_mask : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 1 2 1 1 ...
$ mask_wearing_certified_mask : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 2 1 ...
$ mask_wearing_ffp2 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ mask_wearing_vizire : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ mask_wearing_none : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ mask_wearing_other : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 1 1 ...
$ mask_wearing_reuse : Factor w/ 5 levels "1","2","3","4",..: 2 4 2 5 3 2 5 4 2 4 ...
$ intention_store : Ord.factor w/ 4 levels "4"<"1"<"2"<"3": 1 1 1 1 1 1 1 3 1 1 ...
$ intention_public_transport : Ord.factor w/ 4 levels "4"<"1"<"2"<"3": 4 1 1 1 1 1 1 4 1 1 ...
$ intention_indoor_meeting : Ord.factor w/ 4 levels "4"<"1"<"2"<"3": 1 3 3 2 2 3 3 3 2 1 ...
$ intention_restaurant : Ord.factor w/ 4 levels "4"<"1"<"2"<"3": 2 2 2 2 1 1 2 3 1 2 ...
$ intention_pa : Ord.factor w/ 4 levels "4"<"1"<"2"<"3": 2 2 4 4 3 2 4 3 2 4 ...
$ automaticity_carry_mask : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 3 5 6 7 6 7 1 5 6 ...
$ automaticity_put_on_mask : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 4 6 6 6 7 7 7 1 6 6 ...
$ post_covid_maskwearing_if_reccd : Factor w/ 4 levels "1","2","3","4": 3 4 4 3 1 1 4 4 1 1 ...
$ inst_attitude_protects_self : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 6 4 4 4 6 7 4 4 6 ...
$ inst_attitude_protects_others : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 7 7 6 7 6 7 4 6 6 ...
$ inst_attitude_sense_of_community : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 4 4 4 6 7 4 7 1 5 6 ...
$ inst_attitude_enough_oxygen : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 3 7 6 7 4 7 1 5 3 ...
$ inst_attitude_no_needless_waste : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 3 1 7 6 7 4 7 1 5 1 ...
$ norms_family_friends : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 7 7 6 7 7 7 1 4 7 ...
$ norms_risk_groups : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 7 4 6 7 7 7 2 7 7 ...
$ norms_officials : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 7 7 6 7 7 7 7 7 7 ...
$ norms_people_present_indoors : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 7 7 6 7 4 7 4 6 7 ...
$ aff_attitude_comfortable : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 4 5 5 3 4 6 1 5 2 ...
$ aff_attitude_calm : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 4 3 7 6 7 5 6 3 6 3 ...
$ aff_attitude_safe : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 3 4 5 5 4 5 7 5 6 5 ...
$ aff_attitude_responsible : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 6 7 5 7 6 7 4 7 6 ...
$ aff_attitude_difficult_breathing : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 4 1 3 2 5 2 6 5 5 ...
$ barriers_nothing : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 1 1 ...
$ barriers_money : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ barriers_forget_carry : Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 1 2 1 ...
$ barriers_forget_wear : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
$ barriers_group_pressure : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ barriers_medical_symptoms : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ barriers_skin : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
$ barriers_difficult_breathing : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 2 ...
$ barriers_eyeglasses_fog : Factor w/ 2 levels "0","1": 1 2 2 2 1 2 1 2 1 2 ...
$ barriers_raspyvoice : Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 2 1 1 ...
$ barriers_headache : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ barriers_drymouth : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
$ barriers_earpain : Factor w/ 2 levels "0","1": 1 2 1 1 1 2 1 1 1 1 ...
$ barriers_general_uncomfy : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 2 1 2 ...
$ barriers_other : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ effective_means_handwashing : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 1 7 5 7 6 7 7 7 7 ...
$ effective_means_masks : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 5 1 5 7 6 6 1 7 7 ...
$ effective_means_distance : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 4 1 7 7 5 5 7 7 7 ...
$ effective_means_ventilation : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 4 4 7 7 5 5 4 6 7 ...
$ risk_likely_contagion : Ord.factor w/ 8 levels "1"<"2"<"3"<"4"<..: 2 4 4 3 2 2 2 3 3 1 ...
$ risk_contagion_absent_protection : Ord.factor w/ 8 levels "1"<"2"<"3"<"4"<..: 6 5 6 5 6 5 6 3 6 4 ...
$ risk_severity : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 2 5 6 4 5 3 1 4 7 ...
$ risk_fear_spread : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 4 5 7 4 6 5 7 4 3 7 ...
$ risk_fear_contagion_self : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 3 5 6 4 5 3 3 4 7 ...
$ risk_fear_contagion_others : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 7 6 6 7 6 7 7 4 7 ...
$ risk_fear_restrictions : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 3 1 3 1 4 1 7 3 4 ...
$ sdt_needs_autonomy_1 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 2 3 5 3 5 2 5 2 4 2 ...
$ sdt_needs_competence_1 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 4 5 4 5 4 5 4 4 3 ...
$ sdt_needs_relatedness_1 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 5 1 4 5 4 5 1 5 4 ...
$ sdt_needs_autonomy_2 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 2 4 2 3 5 4 5 1 4 4 ...
$ sdt_needs_competence_2 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 5 5 2 4 4 5 3 4 3 ...
$ sdt_needs_relatedness_2 : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 2 4 3 5 4 5 2 5 4 ...
$ sdt_motivation_extrinsic1 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 1 1 2 1 2 2 1 4 1 ...
$ sdt_motivation_amotivation_1 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 1 1 1 2 1 1 1 5 2 1 ...
$ sdt_motivation_identified_1 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 7 7 5 6 7 6 7 4 7 7 ...
$ sdt_motivation_introjected_1 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 5 5 1 3 6 3 5 1 6 6 ...
$ sdt_motivation_extrinsic_2 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 4 1 2 2 4 1 5 2 1 ...
$ sdt_motivation_introjected_2 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 5 1 5 6 5 7 1 6 4 ...
$ sdt_motivation_amotivation_2 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 2 1 5 1 1 ...
$ sdt_motivation_extrinsic_3 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 1 2 1 4 1 5 1 6 2 1 ...
$ sdt_motivation_identified_2 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 3 7 3 6 7 5 7 1 6 6 ...
$ sdt_motivation_identified_3 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 6 7 2 6 7 5 7 1 7 6 ...
$ sdt_motivation_identified_4 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 4 7 4 5 7 5 7 1 6 6 ...
$ sdt_motivation_amotivation_3 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 1 1 2 1 1 1 6 1 1 ...
$ sdt_motivation_introjected_3 : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 7 3 5 6 5 5 4 6 6 ...
$ attention_check : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 1 1 2 1 1 1 1 1 1 ...
$ vaccination_status_intention_self: Ord.factor w/ 5 levels "4"<"1"<"2"<"3"<..: 1 2 3 1 1 1 3 4 1 2 ...
$ vaccination_status_closeones : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 1 2 4 2 3 2 4 4 ...
$ covid_tested : Factor w/ 4 levels "1","2","3","4": 1 3 2 2 3 1 2 2 2 2 ...
$ had_covid : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 5 2 1 1 4 1 2 1 ...
$ demographic_risk_group : Factor w/ 3 levels "1","2","3": 2 2 2 1 2 2 2 2 2 3 ...
$ needprotection_before_shots : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 1 1 1 2 1 1 1 4 1 1 ...
$ needprotection_after_1_shot : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 2 2 1 2 1 2 1 4 1 1 ...
$ needprotection_after_2_shots : Ord.factor w/ 7 levels "1"<"2"<"3"<"4"<..: 3 5 7 2 3 3 3 4 1 1 ...
$ behaviour_unmasked_bool : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 1 2 ...
# Exploratory data analysis
plot_intro(df)
plot_bar(df)
1 columns ignored with more than 50 categories.
id: 2272 categories
plot_correlation(df)
1 features with more than 20 categories ignored!
id: 2272 categories
head(df[, c(paste0(grouping_var, "_bool"), grouping_var)])
x <- df %>%
select(-behaviour_unmasked_bool, -behaviour_unmasked, -id) %>%
as.data.frame()
y <- df$behaviour_unmasked_bool
set.seed(2021)
inTrain <- createDataPartition(y, p = .80, list = FALSE)[,1]
x_train <- x[ inTrain, ]
x_test <- x[-inTrain, ]
y_train <- y[ inTrain]
y_test <- y[-inTrain]
colnames(x_train)
[1] "demographic_gender" "demographic_age" "demographic_4_areas" "demographic_8_areas"
[5] "behaviour_indoors_nonhouseholders" "behaviour_close_contact" "behaviour_quarantined" "mask_wearing_cloth_mask"
[9] "mask_wearing_disposable_mask" "mask_wearing_certified_mask" "mask_wearing_ffp2" "mask_wearing_vizire"
[13] "mask_wearing_none" "mask_wearing_other" "mask_wearing_reuse" "intention_store"
[17] "intention_public_transport" "intention_indoor_meeting" "intention_restaurant" "intention_pa"
[21] "automaticity_carry_mask" "automaticity_put_on_mask" "post_covid_maskwearing_if_reccd" "inst_attitude_protects_self"
[25] "inst_attitude_protects_others" "inst_attitude_sense_of_community" "inst_attitude_enough_oxygen" "inst_attitude_no_needless_waste"
[29] "norms_family_friends" "norms_risk_groups" "norms_officials" "norms_people_present_indoors"
[33] "aff_attitude_comfortable" "aff_attitude_calm" "aff_attitude_safe" "aff_attitude_responsible"
[37] "aff_attitude_difficult_breathing" "barriers_nothing" "barriers_money" "barriers_forget_carry"
[41] "barriers_forget_wear" "barriers_group_pressure" "barriers_medical_symptoms" "barriers_skin"
[45] "barriers_difficult_breathing" "barriers_eyeglasses_fog" "barriers_raspyvoice" "barriers_headache"
[49] "barriers_drymouth" "barriers_earpain" "barriers_general_uncomfy" "barriers_other"
[53] "effective_means_handwashing" "effective_means_masks" "effective_means_distance" "effective_means_ventilation"
[57] "risk_likely_contagion" "risk_contagion_absent_protection" "risk_severity" "risk_fear_spread"
[61] "risk_fear_contagion_self" "risk_fear_contagion_others" "risk_fear_restrictions" "sdt_needs_autonomy_1"
[65] "sdt_needs_competence_1" "sdt_needs_relatedness_1" "sdt_needs_autonomy_2" "sdt_needs_competence_2"
[69] "sdt_needs_relatedness_2" "sdt_motivation_extrinsic1" "sdt_motivation_amotivation_1" "sdt_motivation_identified_1"
[73] "sdt_motivation_introjected_1" "sdt_motivation_extrinsic_2" "sdt_motivation_introjected_2" "sdt_motivation_amotivation_2"
[77] "sdt_motivation_extrinsic_3" "sdt_motivation_identified_2" "sdt_motivation_identified_3" "sdt_motivation_identified_4"
[81] "sdt_motivation_amotivation_3" "sdt_motivation_introjected_3" "attention_check" "vaccination_status_intention_self"
[85] "vaccination_status_closeones" "covid_tested" "had_covid" "demographic_risk_group"
[89] "needprotection_before_shots" "needprotection_after_1_shot" "needprotection_after_2_shots"
fit_control <- trainControl(method = "repeatedcv",
number = 10, #10
repeats=10, #10
classProbs = TRUE)
grid <- expand.grid(depth = c(4, 6, 8),
learning_rate = 0.1,
iterations = 50, #500
l2_leaf_reg = 1e-3,
rsm = 0.95,
border_count = 64)
tictoc::tic()
cl <- makePSOCKcluster(10)
registerDoParallel(cl)
set.seed(69420)
report <- train(x, as.factor(make.names(y)),
method = catboost.caret,
logging_level = 'Silent', #'Verbose',
preProc = NULL,
tuneGrid = grid, trControl = fit_control)
stopCluster(cl)
tictoc::toc()
71.571 sec elapsed
registerDoSEQ()
report
Catboost
2272 samples
91 predictor
2 classes: 'X0', 'X1'
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 10 times)
Summary of sample sizes: 2044, 2045, 2045, 2045, 2044, 2045, ...
Resampling results across tuning parameters:
depth Accuracy Kappa
4 0.7309843 0.4188090
6 0.7237668 0.4048342
8 0.7173828 0.3900885
Tuning parameter 'learning_rate' was held constant at a value of 0.1
Tuning parameter 'iterations' was held constant at a value of 50
Tuning parameter 'l2_leaf_reg'
was held constant at a value of 0.001
Tuning parameter 'rsm' was held constant at a value of 0.95
Tuning parameter 'border_count' was held constant at a value of 64
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were depth = 4, learning_rate = 0.1, iterations = 50, l2_leaf_reg = 0.001, rsm = 0.95 and border_count = 64.
report$results
importance <- varImp(report, scale = FALSE)
importance
custom variable importance
only 20 most important variables shown (out of 91)
x_pool <- catboost.load_pool(x)
model <- report$finalModel
model
CatBoost model (50 trees)
Loss function: Logloss
Fit to 91 features
shap_values <- catboost.get_feature_importance(
model,
pool = x_pool,
type = "ShapValues",
thread_count = -1,
fstr_type = NULL
)
shap_values_df <- data.frame(shap_values[, 1:ncol(x)])
colnames(shap_values_df) <- colnames(x)
shap_values_df_melt <- reshape2::melt(shap_values_df, value.name="shap_value")
No id variables; using all as measure variables
tmp_x <- data.frame(sapply(x, as.numeric))
actual_values_df_melt <- reshape2::melt(tmp_x, value.name="actual_value")
No id variables; using all as measure variables
shap_actual_df <- cbind(actual_values_df_melt, shap_values_df_melt["shap_value"])
# shap_actual_df[c("actual_value", "shap_value")] <- sapply(shap_actual_df[c("actual_value", "shap_value")], as.factor)
shap_actual_df[c("actual_value")] <- sapply(shap_actual_df[c("actual_value")], as.factor)
ggplot(shap_actual_df, aes(x=shap_value, y=variable, color=actual_value)) +
geom_jitter()
stop!
Error: unexpected '!' in "stop!"